In part one of this notebook we see how well we can reproduce Kd from simulated experimental data with our Bayesian methods, these are the same as those used in the quickmodel.py
script.
In part two of this notebook we will see if we can reproduce some of the problems we've been having with the real experimental data.
In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
import seaborn as sns
%pylab inline
We use the same setup here as we do in the 'Simulating Experimental Fluorescence Binding Data' notebook.
In [2]:
# We define a Kd,
Kd = 2e-9 # M
# a protein concentration,
Ptot = 1e-9 * np.ones([12],np.float64) # M
# and a gradient of ligand concentrations for our experiment.
Ltot = 20.0e-6 / np.array([10**(float(i)/2.0) for i in range(12)]) # M
In [3]:
def two_component_binding(Kd, Ptot, Ltot):
"""
Parameters
----------
Kd : float
Dissociation constant
Ptot : float
Total protein concentration
Ltot : float
Total ligand concentration
Returns
-------
P : float
Free protein concentration
L : float
Free ligand concentration
PL : float
Complex concentration
"""
PL = 0.5 * ((Ptot + Ltot + Kd) - np.sqrt((Ptot + Ltot + Kd)**2 - 4*Ptot*Ltot)) # complex concentration (uM)
P = Ptot - PL; # free protein concentration in sample cell after n injections (uM)
L = Ltot - PL; # free ligand concentration in sample cell after n injections (uM)
return [P, L, PL]
In [4]:
[L, P, PL] = two_component_binding(Kd, Ptot, Ltot)
In [5]:
# y will be complex concentration
# x will be total ligand concentration
plt.semilogx(Ltot,PL, 'o')
plt.xlabel('$[L]_{tot}$ / M')
plt.ylabel('$[PL]$ / M')
plt.ylim(0,1.3e-9)
plt.axhline(Ptot[0],color='0.75',linestyle='--',label='$[P]_{tot}$')
plt.legend();
In [6]:
# Making max 1400 relative fluorescence units, and scaling all of PL (complex concentration)
# to that, adding some random noise
npoints = len(Ltot)
sigma = 10.0 # size of noise
F_PL_i = (1400/1e-9)*PL + sigma * np.random.randn(npoints)
In [7]:
# y will be complex concentration
# x will be total ligand concentration
plt.semilogx(Ltot,F_PL_i, 'ro')
plt.xlabel('$[L]_{tot}$ / M')
plt.ylabel('$Fluorescendce$')
plt.legend();
In [8]:
#Let's add an F_background just so we don't ever go below zero
F_background = 40
#We also need to model fluorescence for our ligand
F_L_i = F_background + (.4/1e-8)*Ltot + sigma * np.random.randn(npoints)
#Let's also add these to our complex fluorescence readout
F_PL_i = F_background + ((1400/1e-9)*PL + sigma * np.random.randn(npoints)) + ((.4/1e-8)*L + sigma * np.random.randn(npoints))
In [9]:
# y will be complex concentration
# x will be total ligand concentration
plt.semilogx(Ltot,F_PL_i, 'ro')
plt.semilogx(Ltot,F_L_i, 'ko')
plt.xlabel('$[L]_{tot}$ / M')
plt.ylabel('$Fluorescence$')
plt.legend();
In [10]:
# We know errors from our pipetting instruments.
P_error = 0.35
L_error = 0.08
assay_volume = 100e-6 # assay volume, L
dPstated = P_error * Ptot
dLstated = L_error * Ltot
In [11]:
# Now we'll use our Bayesian modeling scheme from assaytools.
from assaytools import pymcmodels
pymc_model = pymcmodels.make_model(Ptot, dPstated, Ltot, dLstated,
top_complex_fluorescence=F_PL_i,
top_ligand_fluorescence=F_L_i,
use_primary_inner_filter_correction=True,
use_secondary_inner_filter_correction=True,
assay_volume=assay_volume, DG_prior='uniform')
In [12]:
mcmc = pymcmodels.run_mcmc(pymc_model)
In [13]:
import matplotlib.patches as mpatches #this is for plotting with color patches
In [15]:
def mcmc_three_plots(pymc_model,mcmc,Lstated):
sns.set(style='white')
sns.set_context('talk')
import pymbar
[t,g,Neff_max] = pymbar.timeseries.detectEquilibration(mcmc.DeltaG.trace())
interval= np.percentile(a=mcmc.DeltaG.trace()[t:], q=[2.5, 50.0, 97.5])
[hist,bin_edges] = np.histogram(mcmc.DeltaG.trace()[t:],bins=40,normed=True)
binwidth = np.abs(bin_edges[0]-bin_edges[1])
#set colors for 95% interval
clrs = [(0.7372549019607844, 0.5098039215686274, 0.7411764705882353) for xx in bin_edges]
idxs = bin_edges.argsort()
idxs = idxs[::-1]
gray_before = idxs[bin_edges[idxs] < interval[0]]
gray_after = idxs[bin_edges[idxs] > interval[2]]
for idx in gray_before:
clrs[idx] = (.5,.5,.5)
for idx in gray_after:
clrs[idx] = (.5,.5,.5)
plt.clf();
plt.figure(figsize=(12,3));
plt.subplot(131)
property_name = 'top_complex_fluorescence'
complex = getattr(pymc_model, property_name)
property_name = 'top_ligand_fluorescence'
ligand = getattr(pymc_model, property_name)
for top_complex_fluorescence_model in mcmc.top_complex_fluorescence_model.trace()[::10]:
plt.semilogx(Lstated, top_complex_fluorescence_model, marker='.',color='silver')
for top_ligand_fluorescence_model in mcmc.top_ligand_fluorescence_model.trace()[::10]:
plt.semilogx(Lstated, top_ligand_fluorescence_model, marker='.',color='lightcoral', alpha=0.2)
plt.semilogx(Lstated, complex.value, 'ko',label='complex')
plt.semilogx(Lstated, ligand.value, marker='o',color='firebrick',linestyle='None',label='ligand')
#plt.xlim(.5e-8,5e-5)
plt.xlabel('$[L]_T$ (M)');
plt.yticks([])
plt.ylabel('fluorescence');
plt.legend(loc=0);
plt.subplot(132)
plt.bar(bin_edges[:-1]+binwidth/2,hist,binwidth,color=clrs, edgecolor = "white");
sns.kdeplot(mcmc.DeltaG.trace()[t:],bw=.4,color=(0.39215686274509803, 0.7098039215686275, 0.803921568627451),shade=False)
plt.axvline(x=interval[0],color=(0.5,0.5,0.5),linestyle='--')
plt.axvline(x=interval[1],color=(0.5,0.5,0.5),linestyle='--')
plt.axvline(x=interval[2],color=(0.5,0.5,0.5),linestyle='--')
plt.axvline(x=np.log(Kd),color='k')
plt.xlabel('$\Delta G$ ($k_B T$)',fontsize=16);
plt.ylabel('$P(\Delta G)$',fontsize=16);
#plt.xlim(-15,-8)
hist_legend = mpatches.Patch(color=(0.7372549019607844, 0.5098039215686274, 0.7411764705882353),
label = '$\Delta G$ = %.3g [%.3g,%.3g] $k_B T$'
%(interval[1],interval[0],interval[2]) )
plt.legend(handles=[hist_legend],fontsize=10,loc=0,frameon=True);
plt.subplot(133)
plt.plot(range(0,t),mcmc.DeltaG.trace()[:t], 'g.',label='equil. at %s'%t)
plt.plot(range(t,len(mcmc.DeltaG.trace())),mcmc.DeltaG.trace()[t:], '.')
plt.xlabel('MCMC sample');
plt.ylabel('$\Delta G$ ($k_B T$)');
plt.legend(loc=2);
plt.tight_layout();
return [t,interval,hist,bin_edges,binwidth]
In [20]:
Kd
Out[20]:
In [21]:
print 'Real Kd is 2nm or %s k_B T.' %np.log(Kd)
In [18]:
[t,interval,hist,bin_edges,binwidth] = mcmc_three_plots(pymc_model,mcmc,Ltot)
That works, but the equilibration seems to happen quite late in our sampling! Let's look at some of the other parameters.
In [22]:
well_area = 0.1586 # well area, cm^2 # half-area wells were used here
path_length = assay_volume / well_area
In [25]:
from assaytools import plots
plots.plot_mcmc_results(Ltot, Ptot, path_length, mcmc)
In [ ]:
The first thing we'd like to see is if we can reproduce problems with predicting the delta G for ligands with high affinity compared to the protein concentration used.
In [26]:
# For this we're just going to define a range of Kd's, with some qualitative variable names.
# Note here that our protein concentration is: Ptot = 1e-9 # M
Kd_high = 0.2e-9 # M
Kd_really_high = 0.02e-9 # M
Kd_low = 20e-9 # M
Kd_really_low = 200e-9 # M
In [27]:
# Will need an F_PL_i for all of these
[L_high, P_high, PL_high] = two_component_binding(Kd_high, Ptot, Ltot)
[L_really_high, P_really_high, PL_really_high] = two_component_binding(Kd_really_high, Ptot, Ltot)
[L_low, P_low, PL_low] = two_component_binding(Kd_low, Ptot, Ltot)
[L_really_low, P_really_low, PL_really_low] = two_component_binding(Kd_really_low, Ptot, Ltot)
F_PL_high_i = F_background + ((1400/1e-9)*PL_high + sigma * np.random.randn(npoints)) + ((.4/1e-8)*L_high + sigma * np.random.randn(npoints))
F_PL_really_high_i = F_background + ((1400/1e-9)*PL_really_high + sigma * np.random.randn(npoints)) + ((.4/1e-8)*L_really_high + sigma * np.random.randn(npoints))
F_PL_low_i = F_background + ((1400/1e-9)*PL_low + sigma * np.random.randn(npoints)) + ((.4/1e-8)*L_low + sigma * np.random.randn(npoints))
F_PL_really_low_i = F_background + ((1400/1e-9)*PL_really_low + sigma * np.random.randn(npoints)) + ((.4/1e-8)*L_really_low + sigma * np.random.randn(npoints))
In [28]:
pymc_model_high = pymcmodels.make_model(Ptot, dPstated, Ltot, dLstated,
top_complex_fluorescence=F_PL_high_i,
top_ligand_fluorescence=F_L_i,
use_primary_inner_filter_correction=True,
use_secondary_inner_filter_correction=True,
assay_volume=assay_volume, DG_prior='uniform')
In [29]:
mcmc_high = pymcmodels.run_mcmc(pymc_model_high)
In [30]:
pymc_model_really_high = pymcmodels.make_model(Ptot, dPstated, Ltot, dLstated,
top_complex_fluorescence=F_PL_really_high_i,
top_ligand_fluorescence=F_L_i,
use_primary_inner_filter_correction=True,
use_secondary_inner_filter_correction=True,
assay_volume=assay_volume, DG_prior='uniform')
mcmc_really_high = pymcmodels.run_mcmc(pymc_model_really_high)
In [31]:
pymc_model_low = pymcmodels.make_model(Ptot, dPstated, Ltot, dLstated,
top_complex_fluorescence=F_PL_low_i,
top_ligand_fluorescence=F_L_i,
use_primary_inner_filter_correction=True,
use_secondary_inner_filter_correction=True,
assay_volume=assay_volume, DG_prior='uniform')
mcmc_low = pymcmodels.run_mcmc(pymc_model_low)
In [32]:
pymc_model_really_low = pymcmodels.make_model(Ptot, dPstated, Ltot, dLstated,
top_complex_fluorescence=F_PL_really_low_i,
top_ligand_fluorescence=F_L_i,
use_primary_inner_filter_correction=True,
use_secondary_inner_filter_correction=True,
assay_volume=assay_volume, DG_prior='uniform')
mcmc_really_low = pymcmodels.run_mcmc(pymc_model_really_low)
In [53]:
plt.figure(figsize=(18,4));
plt.subplot(121)
property_name = 'top_complex_fluorescence'
complex = getattr(pymc_model, property_name)
complex_high = getattr(pymc_model_high, property_name)
complex_really_high = getattr(pymc_model_really_high, property_name)
complex_low = getattr(pymc_model_low, property_name)
complex_really_low = getattr(pymc_model_really_low, property_name)
property_name = 'top_ligand_fluorescence'
ligand = getattr(pymc_model, property_name)
for top_complex_fluorescence_model in mcmc.top_complex_fluorescence_model.trace()[::10]:
plt.semilogx(Ltot, top_complex_fluorescence_model, marker='.',color='lightcoral', alpha=0.2, hold=True)
for top_complex_fluorescence_model in mcmc_high.top_complex_fluorescence_model.trace()[::10]:
plt.semilogx(Ltot, top_complex_fluorescence_model, marker='.',color='cornflowerblue', alpha=0.2, hold=True)
for top_complex_fluorescence_model in mcmc_really_high.top_complex_fluorescence_model.trace()[::10]:
plt.semilogx(Ltot, top_complex_fluorescence_model, marker='.',color='powderblue', alpha=0.2, hold=True)
for top_complex_fluorescence_model in mcmc_low.top_complex_fluorescence_model.trace()[::10]:
plt.semilogx(Ltot, top_complex_fluorescence_model, marker='.',color='darkseagreen', alpha=0.2, hold=True)
for top_complex_fluorescence_model in mcmc_really_low.top_complex_fluorescence_model.trace()[::10]:
plt.semilogx(Ltot, top_complex_fluorescence_model, marker='.',color='palegreen', alpha=0.2, hold=True)
for top_ligand_fluorescence_model in mcmc.top_ligand_fluorescence_model.trace()[::10]:
plt.semilogx(Ltot, top_ligand_fluorescence_model, marker='.',color='silver', alpha=0.2, hold=True)
plt.semilogx(Ltot, complex_really_high.value, color='cyan',marker='o',linestyle='None',label='0.02 nM', hold=True)
plt.semilogx(Ltot, complex_high.value, 'bo',label='0.2 nM', hold=True)
plt.semilogx(Ltot, complex.value, 'ro',label='2 nM', hold=True)
plt.semilogx(Ltot, complex_low.value, color='green',marker='o',linestyle='None',label='20 nM', hold=True)
plt.semilogx(Ltot, complex_really_low.value, color='yellowgreen',marker='o',linestyle='None',label='200 nM', hold=True)
plt.semilogx(Ltot, ligand.value, 'ko',label='ligand', hold=True)
plt.xlabel('$[L]_T$ (M)');
plt.ylabel('$Fluorescence$');
plt.title('Simulated complex fluorescence varying ligand affinity')
plt.legend(loc=0);
plt.subplot(122)
plt.hist(mcmc_really_high.DeltaG.trace(),color='cyan', hold=True)
plt.axvline(x=np.log(Kd_really_high),color='cyan',label='0.02 nM', hold=True)
plt.hist(mcmc_high.DeltaG.trace(),color='blue', hold=True)
plt.axvline(x=np.log(Kd_high),color='blue',label='0.2 nM', hold=True)
plt.hist(mcmc.DeltaG.trace(),color='red', hold=True)
plt.axvline(x=np.log(Kd),color='red',label='2 nM', hold=True)
plt.hist(mcmc_low.DeltaG.trace(),color='green', hold=True)
plt.axvline(x=np.log(Kd_low),color='green',label='20 nM', hold=True)
plt.hist(mcmc_really_low.DeltaG.trace(),color='yellowgreen', hold=True)
plt.axvline(x=np.log(Kd_really_low),color='yellowgreen',label='200 nM', hold=True)
plt.axvline(x=np.log(Ptot[0]),color='0.7',linestyle='--',label='log(protein conc)', hold=True)
plt.xlabel('$\Delta G$ ($k_B T$)',fontsize=16);
plt.ylabel('$P(\Delta G)$',fontsize=16);
plt.legend(loc=2);
In [ ]:
In [ ]: